In [1]:
import json
import nltk
import string

In [2]:
def getSentencesFromReview(reviewContent):
    """
    INPUT: a single review consist of serveral sentences
    OUTPUT: a list of single sentence
    """
    sent_detector = nltk.data.load('tokenizers/punkt/english.pickle')
    sentences = sent_detector.tokenize(reviewContent)
    # split agglomerated sentences
    for m in range(len(sentences)):
        subsentences = sentences[m].split('.')
        new_sentences = []
        new_subsen = subsentences[0]
        for n in range(1,len(subsentences)):
            if subsentences[n] and (subsentences[n][0] in string.ascii_uppercase):
                new_subsen += '.'
                new_sentences.append(new_subsen)
                new_subsen = subsentences[n]
            else:
                new_subsen += '.' + subsentences[n]
        new_sentences.append(new_subsen)
        sentences[m] = new_sentences
    # collect all the single sentence into final_sentence list
    final_sentences = []
    for sentence in sentences:
        if isinstance(sentence, list):
            final_sentences.extend(sentence)
        else:
            final_sentences.append(sentence)
    return final_sentences

In [3]:
category_dict={
    '1':'price',
    '2':'pictures',
    '3':'video',
    '4':'zoom',
    '5':'size',
    '6':'design',
    '7':'battery',
    '8':'screen',
    '9':'detection',
    '10':'ease of use',
    '11':'quality',
    '12':'other features',
    '13':'no feature'
}

s1: Specify filename (product id)


In [4]:
product_id = 'B00AQ2BU7E'
reviewFile = open('../data/trainingFiles/AmazonReviews/cameras/{0}.json'.format(product_id), 'r')
reviews = json.load(reviewFile)

print 'This product has {0} reviews.'.format(len(reviews['Reviews']))


This product has 45 reviews.

s2: Label sentences by review


In [43]:
# specify the review index you want to label
n = 0

In [44]:
output_string = ''
for review in reviews['Reviews'][n:n+1]:
    print review['Title']
    sentences = getSentencesFromReview(review['Content'])
    print 'This review has {0} sentences.'.format(len(sentences))
    # start labeling
    for sentence in sentences:
        category = category_dict[raw_input(sentence)]
        output_string += category + '***' + sentence + '\n'
    # save to file
    output_file = open('{0}_labeled.txt'.format(product_id), 'a')
    output_file.write(output_string)
    output_file.close()


Very Disappointed
This review has 25 sentences.
I had read reviews for this camera in many places.13
Most of them were really good so I decided to try it.16 megapixel and 24x optical zoom.4
Now I am not a camera expert, but I do feel like I can use a camera reasonably well and I am used to using different settings.13
I can use a camera beyond just point and shoot.13
The first pictures I took were in my livingroom.13
I wasn't impressed with the focus and lighting.2
So I tried using flash, no flash, indoor setting, auto setting .13
.13
hmmmWell, it was time to go to my daughter's school concert.13
At the school the kids faces all turned out yellow, I had one picture turn out and I don't know why that one did.2
(I even brought it home and took the sd card out to look at it on my computer to see if it was just the camera display, nope the pics were terrible).2
Then I tried to video.13
The faces were yellow on the video too.3
But even worse than that, if that's possible, there is NO stabilization!!!2
!Also, when you zoom in and out it does not stay focused and has to re-adjust the focus constantly!4
So in the middle on the video there is blurry spots where the camera was refocusing.3
Sometimes school events are hard to take pictures at due to the lighting (it was in the gym & I was in the front row)So I tried again at home.2
I had my daughter stand by her granddad.13
He turned out clear her face was blurry.2
I manually adjusted the exposure and ISO bypassing all the presets.13
That sort of helped.13
I did not try the camera outdoors.13
I know some cameras that don't do well indoors do great outside.13
The next day I reset all the factory settings and tried one last time to take a picture in the kitchen of my daughter, it was so blurry.13
That was it, packed it up and took it back.13

In [ ]: